library(magrittr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data_dir <- "/Users/benjaminsmith/Dropbox (University of Oregon)/UO-SAN Lab/Berkman Lab/Devaluation/analysis_files/data/"

preprocessed_data_filepath <- paste0(data_dir,"cloudresearch_survey_results/orfcs_preprocessed_anon_by_subj.csv")
library(readr)
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
preprocessed_data <- readr::read_csv(preprocessed_data_filepath)
## Rows: 882 Columns: 116
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): SID, survey_name.x, aSES_03, aSES_04a_3_TEXT, aSES_16, DEMO_5_4_TE...
## dbl (85): EAFH_fastfoodpast30days, EAFH_fastfoodpastweek, EAFH_fastfoodpastw...
## lgl (17): DEMO_3b_White, DEMO_3b_Black, DEMO_3b_American Indian or Alaska Na...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

A few preprocessed data points:

preprocessed_data$Gender <- factor(as.numeric(preprocessed_data$DEMO_5),levels=1:4,labels=c("Female","Male","Non-binary","Other"))

print(table(preprocessed_data$Gender))
## 
##     Female       Male Non-binary      Other 
##        531        243         13          1
preprocessed_data$Gender3C <-preprocessed_data$Gender
preprocessed_data$Gender3C[preprocessed_data$Gender=="Non-binary"] <- "Other"
preprocessed_data$Gender3C <- as.factor(as.character(preprocessed_data$Gender3C))


preprocessed_data$RuralZIP <- as.factor(preprocessed_data$RuralZIP)
#race*ethnicity
#
#Categories will be "White Alone" "Hispanic White", "Other Hispanic", "Japanese"... and any group with 20 
#apply(preprocessed_data[colnames(preprocessed_data)[grepl("DEMO_3b",colnames(preprocessed_data))]],2,table)
race_response_count <- rowSums(preprocessed_data[c("Demo_3a_IsHispanicLatinoSpanish",colnames(preprocessed_data)[grepl("DEMO_3b.*[^TEXT]$",colnames(preprocessed_data))])])
preprocessed_data$RaceCategorical <- ""
preprocessed_data$RaceCategorical[(race_response_count<=1) & (preprocessed_data$DEMO_3b_White==TRUE)] <- "WhiteOnly"
#preprocessed_data$RaceCategorical[(preprocessed_data$DEMO_3b_White==TRUE) & preprocessed_data$Demo_3a_IsHispanicLatinoSpanish & (race_response_count==2)] <- "WhiteHispanic"
#preprocessed_data$RaceCategorical[preprocessed_data$DEMO_3b_Japanese] <- "Japanese"
preprocessed_data$RaceCategorical[preprocessed_data$Demo_3a_IsHispanicLatinoSpanish] <- "HispanicAnyRace"
preprocessed_data$RaceCategorical[(
  preprocessed_data$DEMO_3b_Japanese |
  preprocessed_data$DEMO_3b_Chinese | 
    preprocessed_data$DEMO_3b_Vietnamese | 
    preprocessed_data$DEMO_3b_Filipino | 
    preprocessed_data$DEMO_3b_Korean | 
    preprocessed_data$`DEMO_3b_Asian Indian`
)] <- "Asian"
preprocessed_data$RaceCategorical[preprocessed_data$`DEMO_3b_Native Hawaiian` | preprocessed_data$DEMO_3b_Samoan | preprocessed_data$DEMO_3b_Chamorro] <- "HawaiianOtherPacificIslander"
preprocessed_data$RaceCategorical[preprocessed_data$RaceCategorical==""]<-"OtherRace"
preprocessed_data$RaceCategorical[race_response_count==0]<-"RaceNotReported"
#nb: "OtherHispanic" excludes respondents in "HawaiianOtherPacificIslander", and "Asian" Categories.
preprocessed_data$RaceCategorical <- factor(preprocessed_data$RaceCategorical,levels=c("WhiteOnly","Asian","HispanicAnyRace","HawaiianOtherPacificIslander","OtherRace","RaceNotReported"))


preprocessed_data$RuralZIP <- as.factor(preprocessed_data$RuralZIP)

reference for the analysis plan is at: https://osf.io/k7jav

From the analysis plan:

People in more safer community environments and communities with healthier food environments (a) will have better eating behavior and (b) will be in better health, after controlling for perceived stress.

Let’s take a look at our distribution of eating behavior and health.

First, health outcome, as measured by BMI:

ggplot(preprocessed_data,aes(WEIGHT, BMI))+geom_point()+labs(title="pre-clean")
## Warning: Removed 217 rows containing missing values (geom_point).

#need to remove this one data point for data quality issue.
preprocessed_data[!is.na(preprocessed_data$WEIGHT) & preprocessed_data$WEIGHT>500,c("WEIGHT","WEIGHT_kg","BMI")] <- NA

ggplot(preprocessed_data,aes(WEIGHT, BMI))+geom_point()+labs(title="post-clean")
## Warning: Removed 218 rows containing missing values (geom_point).

And health:

hist(preprocessed_data$cancer_promoting_minus_preventing_FFQ,breaks = 20)

hist(preprocessed_data$FFQ_cancer_promoting,breaks = 20)

#let's create a log measure of this because it is right-skewed
#though perhaps the danger is right-skewed as well? so I'm not sure.
preprocessed_data$FFQ_cancer_promoting_log <- log(preprocessed_data$FFQ_cancer_promoting)
preprocessed_data$FFQ_cancer_preventing_log <- log(preprocessed_data$FFQ_cancer_preventing)

hist(preprocessed_data$FFQ_cancer_promoting_log,breaks = 20)

ggplot(preprocessed_data,aes(FFQ_cancer_promoting, BMI))+geom_point()+labs(title="post-clean")
## Warning: Removed 218 rows containing missing values (geom_point).

ggplot(preprocessed_data,aes(cancer_promoting_minus_preventing_FFQ, BMI))+geom_point()+labs(title="post-clean")
## Warning: Removed 218 rows containing missing values (geom_point).

ggplot(preprocessed_data,aes(FFQ_cancer_preventing_log, BMI))+geom_point()+labs(title="post-clean")
## Warning: Removed 218 rows containing missing values (geom_point).

cor.test(preprocessed_data$cancer_promoting_minus_preventing_FFQ,preprocessed_data$BMI)
## 
##  Pearson's product-moment correlation
## 
## data:  preprocessed_data$cancer_promoting_minus_preventing_FFQ and preprocessed_data$BMI
## t = 3.3607, df = 662, p-value = 0.0008222
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.05396212 0.20359687
## sample estimates:
##       cor 
## 0.1295168
cor.test(preprocessed_data$FFQ_cancer_promoting,preprocessed_data$BMI)
## 
##  Pearson's product-moment correlation
## 
## data:  preprocessed_data$FFQ_cancer_promoting and preprocessed_data$BMI
## t = -1.3209, df = 662, p-value = 0.187
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.12686338  0.02491175
## sample estimates:
##         cor 
## -0.05127185
cor.test(preprocessed_data$FFQ_cancer_preventing,preprocessed_data$BMI)
## 
##  Pearson's product-moment correlation
## 
## data:  preprocessed_data$FFQ_cancer_preventing and preprocessed_data$BMI
## t = -3.9156, df = 662, p-value = 9.952e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.22397532 -0.07522756
## sample estimates:
##        cor 
## -0.1504528
cor.test(preprocessed_data$FFQ_cancer_preventing_log,preprocessed_data$BMI)
## 
##  Pearson's product-moment correlation
## 
## data:  preprocessed_data$FFQ_cancer_preventing_log and preprocessed_data$BMI
## t = -4.3386, df = 662, p-value = 1.658e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.23933539 -0.09134613
## sample estimates:
##        cor 
## -0.1662769
custom_points <- function(...){
  ggally_points(...,alpha=0.2)
}

We should use cancer_preventing as our health behavior–it seems to relate most closely with BMI. That was a bit of a suprise but there you go!

Also–how many of the subcategories of cancer_promoting should we be looking at?

Now let’s look at perceived stress

hist(preprocessed_data$PSS_sum,breaks=30)

OK, now we look at the harder items: community safety and food access.

For food access, and community safety we have MESA:

#library("ggpairs")
pairs(preprocessed_data[c("MESA_freshproduce","MESA_healthyfoodavailability","MESA_lowfat","MESA_mean","MESA_safetyfromcrime", "MESA_neighborhoodconnectedness")])

To measure the effect of community on eating behavior and health, we will use two linear models to predict (1) eating behavior and (2) health outcome from

  1. Community healthy food environment
  1. Community safety
  1. self-report confounding variables PSS-4
  1. demographic confounders relating to age and race

OK great, so now we have the ability to to all of that

#need to tidy race a bit.

predict_bmi <- lm(BMI~
                    MESA_healthyfoodavailability+MESA_safetyfromcrime+PSS_sum+
                    DEMO_Age+Gender+RaceCategorical,
                  preprocessed_data)
summary(predict_bmi)
## 
## Call:
## lm(formula = BMI ~ MESA_healthyfoodavailability + MESA_safetyfromcrime + 
##     PSS_sum + DEMO_Age + Gender + RaceCategorical, data = preprocessed_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.242  -5.411  -1.125   3.762  34.613 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                 29.098282   2.222169  13.095
## MESA_healthyfoodavailability                 0.752239   0.340728   2.208
## MESA_safetyfromcrime                         0.262359   0.304898   0.860
## PSS_sum                                     -0.814836   0.621326  -1.311
## DEMO_Age                                     0.003293   0.007528   0.437
## GenderMale                                   0.223158   0.655887   0.340
## GenderNon-binary                             4.782014   3.106948   1.539
## GenderOther                                 -6.762901   7.512245  -0.900
## RaceCategoricalAsian                        -0.501463   1.185550  -0.423
## RaceCategoricalHispanicAnyRace              -3.609322   1.277317  -2.826
## RaceCategoricalHawaiianOtherPacificIslander -0.425895   1.716155  -0.248
## RaceCategoricalOtherRace                    -3.251937   1.928196  -1.687
## RaceCategoricalRaceNotReported               0.539436   3.382151   0.159
##                                             Pr(>|t|)    
## (Intercept)                                  < 2e-16 ***
## MESA_healthyfoodavailability                 0.02762 *  
## MESA_safetyfromcrime                         0.38985    
## PSS_sum                                      0.19019    
## DEMO_Age                                     0.66196    
## GenderMale                                   0.73379    
## GenderNon-binary                             0.12428    
## GenderOther                                  0.36833    
## RaceCategoricalAsian                         0.67246    
## RaceCategoricalHispanicAnyRace               0.00487 ** 
## RaceCategoricalHawaiianOtherPacificIslander  0.80409    
## RaceCategoricalOtherRace                     0.09219 .  
## RaceCategoricalRaceNotReported               0.87333    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.495 on 627 degrees of freedom
##   (242 observations deleted due to missingness)
## Multiple R-squared:  0.03754,    Adjusted R-squared:  0.01912 
## F-statistic: 2.038 on 12 and 627 DF,  p-value: 0.01923
predict_ffq <- lm(cancer_promoting_minus_preventing_FFQ~
                    MESA_healthyfoodavailability+MESA_safetyfromcrime+PSS_sum+
                    DEMO_Age+Gender+RaceCategorical,
                  preprocessed_data)
summary(predict_ffq)
## 
## Call:
## lm(formula = cancer_promoting_minus_preventing_FFQ ~ MESA_healthyfoodavailability + 
##     MESA_safetyfromcrime + PSS_sum + DEMO_Age + Gender + RaceCategorical, 
##     data = preprocessed_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.20162 -0.27230  0.06276  0.30915  1.74195 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                 -0.1132923  0.1376193  -0.823
## MESA_healthyfoodavailability                 0.0698966  0.0214605   3.257
## MESA_safetyfromcrime                         0.0062599  0.0193595   0.323
## PSS_sum                                     -0.0491422  0.0376907  -1.304
## DEMO_Age                                    -0.0012930  0.0005074  -2.548
## GenderMale                                   0.0407697  0.0417553   0.976
## GenderNon-binary                            -0.1022937  0.1473694  -0.694
## GenderOther                                 -0.1520392  0.5162078  -0.295
## RaceCategoricalAsian                        -0.0897567  0.0754283  -1.190
## RaceCategoricalHispanicAnyRace               0.0982779  0.0784793   1.252
## RaceCategoricalHawaiianOtherPacificIslander -0.1015326  0.1076059  -0.944
## RaceCategoricalOtherRace                    -0.0853195  0.1243573  -0.686
## RaceCategoricalRaceNotReported              -0.0715121  0.2121146  -0.337
##                                             Pr(>|t|)   
## (Intercept)                                  0.41064   
## MESA_healthyfoodavailability                 0.00118 **
## MESA_safetyfromcrime                         0.74652   
## PSS_sum                                      0.19270   
## DEMO_Age                                     0.01103 * 
## GenderMale                                   0.32919   
## GenderNon-binary                             0.48782   
## GenderOther                                  0.76843   
## RaceCategoricalAsian                         0.23444   
## RaceCategoricalHispanicAnyRace               0.21087   
## RaceCategoricalHawaiianOtherPacificIslander  0.34570   
## RaceCategoricalOtherRace                     0.49288   
## RaceCategoricalRaceNotReported               0.73611   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5152 on 739 degrees of freedom
##   (130 observations deleted due to missingness)
## Multiple R-squared:  0.03486,    Adjusted R-squared:  0.01919 
## F-statistic: 2.224 on 12 and 739 DF,  p-value: 0.009423

There is a rural-urban disparity in (a) eating behavior and (b) health outcomes in Oregon, after controlling for age and race but not income or SES.

predict_ffq_rural <- lm(cancer_promoting_minus_preventing_FFQ~RuralZIP+
                    #MESA_healthyfoodavailability+MESA_safetyfromcrime+PSS_sum+
                    DEMO_Age+Gender+RaceCategorical,
                  preprocessed_data)
summary(predict_ffq_rural)
## 
## Call:
## lm(formula = cancer_promoting_minus_preventing_FFQ ~ RuralZIP + 
##     DEMO_Age + Gender + RaceCategorical, data = preprocessed_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.25008 -0.27422  0.06421  0.32314  1.70305 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                 -0.1054614  0.0390570  -2.700
## RuralZIPTRUE                                 0.0286925  0.0389546   0.737
## DEMO_Age                                    -0.0012951  0.0005089  -2.545
## GenderMale                                   0.0332378  0.0419539   0.792
## GenderNon-binary                            -0.0244841  0.1520210  -0.161
## GenderOther                                 -0.1355850  0.5180758  -0.262
## RaceCategoricalAsian                        -0.1119114  0.0762461  -1.468
## RaceCategoricalHispanicAnyRace               0.1016477  0.0792225   1.283
## RaceCategoricalHawaiianOtherPacificIslander -0.1054523  0.1081974  -0.975
## RaceCategoricalOtherRace                    -0.0921164  0.1272990  -0.724
## RaceCategoricalRaceNotReported              -0.0103183  0.2123496  -0.049
##                                             Pr(>|t|)   
## (Intercept)                                  0.00709 **
## RuralZIPTRUE                                 0.46162   
## DEMO_Age                                     0.01114 * 
## GenderMale                                   0.42847   
## GenderNon-binary                             0.87209   
## GenderOther                                  0.79362   
## RaceCategoricalAsian                         0.14260   
## RaceCategoricalHispanicAnyRace               0.19987   
## RaceCategoricalHawaiianOtherPacificIslander  0.33007   
## RaceCategoricalOtherRace                     0.46953   
## RaceCategoricalRaceNotReported               0.96126   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.517 on 733 degrees of freedom
##   (138 observations deleted due to missingness)
## Multiple R-squared:  0.01772,    Adjusted R-squared:  0.004317 
## F-statistic: 1.322 on 10 and 733 DF,  p-value: 0.214
predict_bmi_rural <- lm(BMI~RuralZIP+
                    #MESA_healthyfoodavailability+MESA_safetyfromcrime+PSS_sum+
                    DEMO_Age+Gender3C+RaceCategorical,
                  preprocessed_data)
summary(predict_bmi_rural)
## 
## Call:
## lm(formula = BMI ~ RuralZIP + DEMO_Age + Gender3C + RaceCategorical, 
##     data = preprocessed_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.876  -5.395  -1.302   4.168  36.531 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                 28.047519   0.592959  47.301
## RuralZIPTRUE                                 2.089661   0.613042   3.409
## DEMO_Age                                     0.003377   0.007480   0.451
## Gender3CMale                                 0.423035   0.654626   0.646
## Gender3COther                                5.163172   3.078720   1.677
## RaceCategoricalAsian                        -0.647414   1.175332  -0.551
## RaceCategoricalHispanicAnyRace              -3.733464   1.283482  -2.909
## RaceCategoricalHawaiianOtherPacificIslander -0.302829   1.711195  -0.177
## RaceCategoricalOtherRace                    -3.752227   1.959148  -1.915
## RaceCategoricalRaceNotReported               0.948939   3.363304   0.282
##                                             Pr(>|t|)    
## (Intercept)                                  < 2e-16 ***
## RuralZIPTRUE                                0.000695 ***
## DEMO_Age                                    0.651812    
## Gender3CMale                                0.518372    
## Gender3COther                               0.094033 .  
## RaceCategoricalAsian                        0.581944    
## RaceCategoricalHispanicAnyRace              0.003757 ** 
## RaceCategoricalHawaiianOtherPacificIslander 0.859590    
## RaceCategoricalOtherRace                    0.055919 .  
## RaceCategoricalRaceNotReported              0.777926    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.469 on 624 degrees of freedom
##   (248 observations deleted due to missingness)
## Multiple R-squared:  0.04009,    Adjusted R-squared:  0.02625 
## F-statistic: 2.896 on 9 and 624 DF,  p-value: 0.002307

The rural-urban disparity can be partially explained by the availability of healthy food in the community.

predict_bmi_rural_2 <- lm(BMI~RuralZIP+
                    MESA_healthyfoodavailability+
                    DEMO_Age+Gender+RaceCategorical,
                  preprocessed_data)
summary(predict_bmi_rural_2)
## 
## Call:
## lm(formula = BMI ~ RuralZIP + MESA_healthyfoodavailability + 
##     DEMO_Age + Gender + RaceCategorical, data = preprocessed_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.886  -5.245  -1.131   4.448  35.838 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                 26.698002   0.926888  28.804
## RuralZIPTRUE                                 1.863278   0.618899   3.011
## MESA_healthyfoodavailability                 0.640455   0.331534   1.932
## DEMO_Age                                     0.003718   0.007460   0.498
## GenderMale                                   0.435971   0.652425   0.668
## GenderNon-binary                             7.244411   3.358460   2.157
## GenderOther                                 -5.883342   7.461939  -0.788
## RaceCategoricalAsian                        -0.466086   1.175242  -0.397
## RaceCategoricalHispanicAnyRace              -3.723564   1.280132  -2.909
## RaceCategoricalHawaiianOtherPacificIslander -0.242807   1.705747  -0.142
## RaceCategoricalOtherRace                    -3.714034   1.952590  -1.902
## RaceCategoricalRaceNotReported               0.529958   3.358618   0.158
##                                             Pr(>|t|)    
## (Intercept)                                  < 2e-16 ***
## RuralZIPTRUE                                 0.00271 ** 
## MESA_healthyfoodavailability                 0.05384 .  
## DEMO_Age                                     0.61839    
## GenderMale                                   0.50423    
## GenderNon-binary                             0.03138 *  
## GenderOther                                  0.43074    
## RaceCategoricalAsian                         0.69181    
## RaceCategoricalHispanicAnyRace               0.00376 ** 
## RaceCategoricalHawaiianOtherPacificIslander  0.88685    
## RaceCategoricalOtherRace                     0.05762 .  
## RaceCategoricalRaceNotReported               0.87467    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.444 on 622 degrees of freedom
##   (248 observations deleted due to missingness)
## Multiple R-squared:  0.04974,    Adjusted R-squared:  0.03293 
## F-statistic: 2.959 on 11 and 622 DF,  p-value: 0.0007743

Need to follow this up with a full mediation model.

#https://www.rdocumentation.org/packages/mediation/versions/4.5.0/topics/mediate
library(mediation)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: Matrix
## Loading required package: mvtnorm
## Loading required package: sandwich
## mediation: Causal Mediation Analysis
## Version: 4.5.0
#first need to only get rows that are NA on all the values; need to do this prior because we have two different models and need them using the same rows.
#strictly speaking, only need to add columns not included in both; lm package will consistently eliminate the rest.
exclude_row <- rowSums(is.na(preprocessed_data[,c("BMI","MESA_healthyfoodavailability")]))>0

preprocessed_data$MESA_healthyfoodavailability_ln <- log(preprocessed_data$MESA_healthyfoodavailability)
predict_mediator <- lm(MESA_healthyfoodavailability_ln~RuralZIP+DEMO_Age+Gender3C+RaceCategorical,preprocessed_data[!exclude_row,])
predict_outcome <- lm(BMI~MESA_healthyfoodavailability_ln+RuralZIP+DEMO_Age+Gender3C+RaceCategorical,preprocessed_data[!exclude_row,])

healthy_food_mediation <- mediate(predict_mediator,predict_outcome,boot=FALSE,sims=2000,treat="RuralZIP",mediator="MESA_healthyfoodavailability_ln")
## Warning in mediate(predict_mediator, predict_outcome, boot = FALSE, sims =
## 2000, : treatment and control values do not match factor levels; using FALSE and
## TRUE as control and treatment, respectively

We didn’t pre-register taking the log of MESA_healthyfoodavailability, but it is a reasonable deviation considering its non-normality.

summary(healthy_food_mediation)
## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME            0.18080      0.01056         0.41   0.040 *  
## ADE             1.92549      0.71584         3.13   0.002 ** 
## Total Effect    2.10630      0.87856         3.31   0.001 ***
## Prop. Mediated  0.08245      0.00441         0.26   0.041 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 634 
## 
## 
## Simulations: 2000

There is a small mediation effect of healthy food availability, but we have to stress: it is particularly small.

And we couldn’t detect the effect from food consumption, which is a bit suspicious.

plot(healthy_food_mediation)

Eating behavior and health outcomes will be influenced by perceived stress (negatively), the Self Report Habit Index (negatively). Health outcomes will be influenced by the short version of the IPAQ (negatively)

predict_health_outcomes <- lm(BMI~PSS_sum+SRHI_healthy+SRHI_unhealthy+IPAQ_total_METminutes,
                  preprocessed_data)
summary(predict_health_outcomes)
## 
## Call:
## lm(formula = BMI ~ PSS_sum + SRHI_healthy + SRHI_unhealthy + 
##     IPAQ_total_METminutes, data = preprocessed_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.015  -4.712  -1.088   3.914  37.049 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           32.2294387  2.3668666  13.617  < 2e-16 ***
## PSS_sum               -0.6135643  0.6015653  -1.020  0.30813    
## SRHI_healthy          -0.1338435  0.0330823  -4.046 5.83e-05 ***
## SRHI_unhealthy         0.0814629  0.0308489   2.641  0.00847 ** 
## IPAQ_total_METminutes -0.0001329  0.0001031  -1.289  0.19769    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.348 on 659 degrees of freedom
##   (218 observations deleted due to missingness)
## Multiple R-squared:  0.07141,    Adjusted R-squared:  0.06577 
## F-statistic: 12.67 on 4 and 659 DF,  p-value: 6.138e-10
predict_eating_behavior <- lm(cancer_promoting_minus_preventing_FFQ~PSS_sum+SRHI_healthy+SRHI_unhealthy+IPAQ_total_METminutes,
                  preprocessed_data)
summary(predict_eating_behavior)
## 
## Call:
## lm(formula = cancer_promoting_minus_preventing_FFQ ~ PSS_sum + 
##     SRHI_healthy + SRHI_unhealthy + IPAQ_total_METminutes, data = preprocessed_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.04083 -0.24541  0.02776  0.27787  1.85396 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            7.442e-02  1.253e-01   0.594   0.5528    
## PSS_sum               -6.393e-02  3.110e-02  -2.055   0.0402 *  
## SRHI_healthy          -1.742e-02  1.735e-03 -10.040   <2e-16 ***
## SRHI_unhealthy         1.439e-02  1.634e-03   8.804   <2e-16 ***
## IPAQ_total_METminutes  6.353e-06  4.877e-06   1.303   0.1930    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4416 on 831 degrees of freedom
##   (46 observations deleted due to missingness)
## Multiple R-squared:  0.2855, Adjusted R-squared:  0.282 
## F-statistic:    83 on 4 and 831 DF,  p-value: < 2.2e-16

Contrary to prediction, IPAQ predicts FFQ but NOT BMI. Suggests a cultural link `health-concerned’ people who work out more and eat more vegetables despite their lack of efficacy??? Or just social class or something.

To measure the rural-urban disparity, we will use a similar linear model without community information but including rurality.

We will then examine whether Community healthy food environment and Community safety mediate the rurality effect.

#https://www.rdocumentation.org/packages/mediation/versions/4.5.0/topics/mediate
library(mediation)
#first need to only get rows that are NA on all the values; need to do this prior because we have two different models and need them using the same rows.
#strictly speaking, only need to add columns not included in both; lm package will consistently eliminate the rest.
exclude_row <- rowSums(is.na(preprocessed_data[,c("BMI","MESA_safetyfromcrime")]))>0
preprocessed_data$RuralZIP<-as.numeric(preprocessed_data$RuralZIP)

preprocessed_data$MESA_safetyfromcrime_ln <- log(preprocessed_data$MESA_safetyfromcrime)
predict_mediator <- lm(MESA_safetyfromcrime_ln~RuralZIP+DEMO_Age+Gender3C+RaceCategorical,preprocessed_data[!exclude_row,])
predict_outcome <- lm(BMI~MESA_safetyfromcrime_ln+RuralZIP+DEMO_Age+Gender3C+RaceCategorical,preprocessed_data[!exclude_row,])

crime_mediation <- mediate(predict_mediator,predict_outcome,boot=FALSE,sims=2000,treat="RuralZIP",mediator="MESA_safetyfromcrime_ln")
summary(crime_mediation)
## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME            -0.1706      -0.4072         0.02   0.076 .  
## ADE              2.2752       1.0640         3.44  <2e-16 ***
## Total Effect     2.1047       0.9355         3.28  <2e-16 ***
## Prop. Mediated  -0.0761      -0.2715         0.01   0.076 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 634 
## 
## 
## Simulations: 2000
hist(preprocessed_data$MESA_neighborhoodconnectedness)

ggplot(preprocessed_data,aes(x=as.factor(RuralZIP),y=FFQ_cancer_preventing))+geom_violin()+geom_jitter(alpha=0.2)
## Warning: Removed 46 rows containing non-finite values (stat_ydensity).
## Warning: Removed 46 rows containing missing values (geom_point).

summary(lm(cancer_promoting_minus_preventing_FFQ~RuralZIP,preprocessed_data %>% filter(!is.na(RuralZIP) & !is.na(cancer_promoting_minus_preventing_FFQ))))
## 
## Call:
## lm(formula = cancer_promoting_minus_preventing_FFQ ~ RuralZIP, 
##     data = preprocessed_data %>% filter(!is.na(RuralZIP) & !is.na(cancer_promoting_minus_preventing_FFQ)))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.39041 -0.25994  0.06921  0.31459  1.72767 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.18772    0.05601  -3.352 0.000842 ***
## RuralZIP     0.02313    0.03759   0.615 0.538515    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5164 on 780 degrees of freedom
## Multiple R-squared:  0.0004852,  Adjusted R-squared:  -0.0007962 
## F-statistic: 0.3786 on 1 and 780 DF,  p-value: 0.5385
summary(lm(cancer_promoting_minus_preventing_FFQ~RuralZIP,preprocessed_data))
## 
## Call:
## lm(formula = cancer_promoting_minus_preventing_FFQ ~ RuralZIP, 
##     data = preprocessed_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.39041 -0.25994  0.06921  0.31459  1.72767 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.18772    0.05601  -3.352 0.000842 ***
## RuralZIP     0.02313    0.03759   0.615 0.538515    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5164 on 780 degrees of freedom
##   (100 observations deleted due to missingness)
## Multiple R-squared:  0.0004852,  Adjusted R-squared:  -0.0007962 
## F-statistic: 0.3786 on 1 and 780 DF,  p-value: 0.5385
summary(lm(cancer_promoting_minus_preventing_FFQ~RuralZIP+DEMO_Age+Gender3C+RaceCategorical,preprocessed_data))
## 
## Call:
## lm(formula = cancer_promoting_minus_preventing_FFQ ~ RuralZIP + 
##     DEMO_Age + Gender3C + RaceCategorical, data = preprocessed_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.25003 -0.27408  0.06412  0.32370  1.70316 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                 -0.1345492  0.0661680  -2.033
## RuralZIP                                     0.0288789  0.0389188   0.742
## DEMO_Age                                    -0.0012934  0.0005085  -2.543
## Gender3CMale                                 0.0332493  0.0419265   0.793
## Gender3COther                               -0.0330720  0.1461125  -0.226
## RaceCategoricalAsian                        -0.1118633  0.0761959  -1.468
## RaceCategoricalHispanicAnyRace               0.1022593  0.0791154   1.293
## RaceCategoricalHawaiianOtherPacificIslander -0.1053509  0.1081257  -0.974
## RaceCategoricalOtherRace                    -0.0920421  0.1272155  -0.724
## RaceCategoricalRaceNotReported              -0.0102498  0.2122108  -0.048
##                                             Pr(>|t|)  
## (Intercept)                                   0.0424 *
## RuralZIP                                      0.4583  
## DEMO_Age                                      0.0112 *
## Gender3CMale                                  0.4280  
## Gender3COther                                 0.8210  
## RaceCategoricalAsian                          0.1425  
## RaceCategoricalHispanicAnyRace                0.1966  
## RaceCategoricalHawaiianOtherPacificIslander   0.3302  
## RaceCategoricalOtherRace                      0.4696  
## RaceCategoricalRaceNotReported                0.9615  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5166 on 734 degrees of freedom
##   (138 observations deleted due to missingness)
## Multiple R-squared:  0.01766,    Adjusted R-squared:  0.005616 
## F-statistic: 1.466 on 9 and 734 DF,  p-value: 0.1563
summary(lm(cancer_promoting_minus_preventing_FFQ~RuralZIP+DEMO_Age+Gender3C+RaceCategorical,preprocessed_data))
## 
## Call:
## lm(formula = cancer_promoting_minus_preventing_FFQ ~ RuralZIP + 
##     DEMO_Age + Gender3C + RaceCategorical, data = preprocessed_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.25003 -0.27408  0.06412  0.32370  1.70316 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                 -0.1345492  0.0661680  -2.033
## RuralZIP                                     0.0288789  0.0389188   0.742
## DEMO_Age                                    -0.0012934  0.0005085  -2.543
## Gender3CMale                                 0.0332493  0.0419265   0.793
## Gender3COther                               -0.0330720  0.1461125  -0.226
## RaceCategoricalAsian                        -0.1118633  0.0761959  -1.468
## RaceCategoricalHispanicAnyRace               0.1022593  0.0791154   1.293
## RaceCategoricalHawaiianOtherPacificIslander -0.1053509  0.1081257  -0.974
## RaceCategoricalOtherRace                    -0.0920421  0.1272155  -0.724
## RaceCategoricalRaceNotReported              -0.0102498  0.2122108  -0.048
##                                             Pr(>|t|)  
## (Intercept)                                   0.0424 *
## RuralZIP                                      0.4583  
## DEMO_Age                                      0.0112 *
## Gender3CMale                                  0.4280  
## Gender3COther                                 0.8210  
## RaceCategoricalAsian                          0.1425  
## RaceCategoricalHispanicAnyRace                0.1966  
## RaceCategoricalHawaiianOtherPacificIslander   0.3302  
## RaceCategoricalOtherRace                      0.4696  
## RaceCategoricalRaceNotReported                0.9615  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5166 on 734 degrees of freedom
##   (138 observations deleted due to missingness)
## Multiple R-squared:  0.01766,    Adjusted R-squared:  0.005616 
## F-statistic: 1.466 on 9 and 734 DF,  p-value: 0.1563
summary(lm(BMI~MESA_neighborhoodconnectedness+RuralZIP+DEMO_Age+Gender3C+RaceCategorical,preprocessed_data))
## 
## Call:
## lm(formula = BMI ~ MESA_neighborhoodconnectedness + RuralZIP + 
##     DEMO_Age + Gender3C + RaceCategorical, data = preprocessed_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.469  -5.339  -1.275   4.262  36.230 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                 24.596378   1.466590  16.771
## MESA_neighborhoodconnectedness               0.463974   0.359318   1.291
## RuralZIP                                     2.130139   0.613515   3.472
## DEMO_Age                                     0.003345   0.007476   0.447
## Gender3CMale                                 0.532683   0.659763   0.807
## Gender3COther                                5.109563   3.077355   1.660
## RaceCategoricalAsian                        -0.710771   1.175729  -0.605
## RaceCategoricalHispanicAnyRace              -3.721550   1.282830  -2.901
## RaceCategoricalHawaiianOtherPacificIslander -0.270018   1.710469  -0.158
## RaceCategoricalOtherRace                    -3.753067   1.958102  -1.917
## RaceCategoricalRaceNotReported               1.145451   3.364950   0.340
##                                             Pr(>|t|)    
## (Intercept)                                  < 2e-16 ***
## MESA_neighborhoodconnectedness              0.197091    
## RuralZIP                                    0.000552 ***
## DEMO_Age                                    0.654718    
## Gender3CMale                                0.419753    
## Gender3COther                               0.097342 .  
## RaceCategoricalAsian                        0.545707    
## RaceCategoricalHispanicAnyRace              0.003850 ** 
## RaceCategoricalHawaiianOtherPacificIslander 0.874617    
## RaceCategoricalOtherRace                    0.055735 .  
## RaceCategoricalRaceNotReported              0.733665    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.465 on 623 degrees of freedom
##   (248 observations deleted due to missingness)
## Multiple R-squared:  0.04266,    Adjusted R-squared:  0.02729 
## F-statistic: 2.776 on 10 and 623 DF,  p-value: 0.002307
summary(lm(cancer_promoting_minus_preventing_FFQ~MESA_neighborhoodconnectedness+RuralZIP+DEMO_Age+Gender3C+RaceCategorical,preprocessed_data))
## 
## Call:
## lm(formula = cancer_promoting_minus_preventing_FFQ ~ MESA_neighborhoodconnectedness + 
##     RuralZIP + DEMO_Age + Gender3C + RaceCategorical, data = preprocessed_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.23588 -0.27512  0.06902  0.31449  1.75899 
## 
## Coefficients:
##                                               Estimate Std. Error t value
## (Intercept)                                 -0.3272748  0.0943535  -3.469
## MESA_neighborhoodconnectedness               0.0658014  0.0230722   2.852
## RuralZIP                                     0.0343534  0.0387785   0.886
## DEMO_Age                                    -0.0012830  0.0005061  -2.535
## Gender3CMale                                 0.0489698  0.0420868   1.164
## Gender3COther                               -0.0258278  0.1454298  -0.178
## RaceCategoricalAsian                        -0.1204122  0.0758876  -1.587
## RaceCategoricalHispanicAnyRace               0.1004693  0.0787362   1.276
## RaceCategoricalHawaiianOtherPacificIslander -0.0982498  0.1076328  -0.913
## RaceCategoricalOtherRace                    -0.0972747  0.1266150  -0.768
## RaceCategoricalRaceNotReported               0.0133445  0.2113489   0.063
##                                             Pr(>|t|)    
## (Intercept)                                 0.000554 ***
## MESA_neighborhoodconnectedness              0.004467 ** 
## RuralZIP                                    0.375969    
## DEMO_Age                                    0.011448 *  
## Gender3CMale                                0.244988    
## Gender3COther                               0.859089    
## RaceCategoricalAsian                        0.113008    
## RaceCategoricalHispanicAnyRace              0.202351    
## RaceCategoricalHawaiianOtherPacificIslander 0.361635    
## RaceCategoricalOtherRace                    0.442573    
## RaceCategoricalRaceNotReported              0.949673    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5141 on 733 degrees of freedom
##   (138 observations deleted due to missingness)
## Multiple R-squared:  0.02844,    Adjusted R-squared:  0.01519 
## F-statistic: 2.146 on 10 and 733 DF,  p-value: 0.01932

Interesting: connectedness is very strongly related to cancer_promoting_minus_preventing_FFQ…but it’s not related to rurality.

#https://www.rdocumentation.org/packages/mediation/versions/4.5.0/topics/mediate
library(mediation)
#first need to only get rows that are NA on all the values; need to do this prior because we have two different models and need them using the same rows.
#strictly speaking, only need to add columns not included in both; lm package will consistently eliminate the rest.
exclude_row <- rowSums(is.na(preprocessed_data[,c("FFQ_cancer_preventing","MESA_neighborhoodconnectedness")]))>0
preprocessed_data$RuralZIP<-as.numeric(preprocessed_data$RuralZIP)


predict_mediator <- lm(MESA_neighborhoodconnectedness~RuralZIP+DEMO_Age+Gender3C+RaceCategorical,preprocessed_data[!exclude_row,])
predict_outcome <- lm(FFQ_cancer_preventing~MESA_neighborhoodconnectedness+RuralZIP+DEMO_Age+Gender3C+RaceCategorical,preprocessed_data[!exclude_row,])

connectedness_mediation <- mediate(predict_mediator,predict_outcome,boot=FALSE,sims=2000,treat="RuralZIP",mediator="MESA_neighborhoodconnectedness")
summary(connectedness_mediation)
## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
##                Estimate 95% CI Lower 95% CI Upper p-value
## ACME            0.01943     -0.00746         0.05    0.16
## ADE            -0.01578     -0.11331         0.08    0.75
## Total Effect    0.00365     -0.09595         0.11    0.94
## Prop. Mediated  0.10861     -6.77636         6.75    0.88
## 
## Sample Size Used: 744 
## 
## 
## Simulations: 2000

We will measure two-way correlations between eating behavior, health outcomes, PSS-4, the SRHI, and the IPAQ.

ggpairs(preprocessed_data[c("cancer_preventing_FFQ","cancer_promoting_FFQ","BMI","PSS_sum","SRHI_healthy","SRHI_unhealthy","IPAQ_walkingminutes","IPAQ_moderateminutes","IPAQ_vigorousminutes","IPAQ_sittinghours")],
        progress = FALSE
        ,lower = list(continuous = custom_points))
## Warning: Removed 46 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 218 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values
## Warning: Removed 46 rows containing missing values (geom_point).
## Warning: Removed 46 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 218 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 46 rows containing missing values
## Warning: Removed 218 rows containing missing values (geom_point).

## Warning: Removed 218 rows containing missing values (geom_point).
## Warning: Removed 218 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 218 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 218 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 218 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 218 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 218 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 218 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 218 rows containing missing values
## Warning: Removed 46 rows containing missing values (geom_point).

## Warning: Removed 46 rows containing missing values (geom_point).
## Warning: Removed 218 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing non-finite values (stat_density).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 18 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 18 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 18 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 18 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 18 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 18 rows containing missing values
## Warning: Removed 46 rows containing missing values (geom_point).

## Warning: Removed 46 rows containing missing values (geom_point).
## Warning: Removed 218 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 46 rows containing missing values (geom_point).

## Warning: Removed 46 rows containing missing values (geom_point).
## Warning: Removed 218 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 46 rows containing missing values (geom_point).

## Warning: Removed 46 rows containing missing values (geom_point).
## Warning: Removed 218 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 46 rows containing missing values (geom_point).

## Warning: Removed 46 rows containing missing values (geom_point).
## Warning: Removed 218 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 46 rows containing missing values (geom_point).

## Warning: Removed 46 rows containing missing values (geom_point).
## Warning: Removed 218 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 46 rows containing missing values (geom_point).

## Warning: Removed 46 rows containing missing values (geom_point).
## Warning: Removed 218 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).

Conclusions re: pre-registered analyses.

General hypotheses on factors afecting eatint behavior and health outcomes.

We hypothesized that people in safer [H1a] and more heathily provisioned [H1b] communities will have better eating behavior.

Following a pre-registered analysis plan, we found that people scoring higher on MESA healthy food availability reported WORSE eating behavior - higher amounts of cancer promoting relative to preventing foods on the FFQ (beta=0.070, p=0.001). probably suggests MESA food availability is wrongly reverse-scored!?!?! Community safety was unrelated

We also hypothesized that people in safer [H2a] and more healthily provisioned [H2b] communities will have lower BMIs.

We found that MESA_healthyfoodavailability was related to HIGHER BMIs (again suggesting a problem with reverse-scoring) (beta=0.075, p=0.03) but community safety was unrelated.

Prediction of rural-urban dispartiy

We found a rural-urban disparity in health outcomes [H3a], but not eating behavior[H3b], after controlling for age, race, and gender. Rurality predicted a higher BMI (b=2.05, se=0.61, p<0.001), but not cancer_promoting_minus_preventing_FFQ (p=0.46).

In the model, Hispanic ethnicity predicted lower BMI (beta=-3.79, p<0.01) in our sample. No other age, race, or gender effects were found. There were insufficient numbers of Black respondents to measure any association with Black racial identification.

Explaining the rural-urban disparity.

We predicted disparities could be explained by the availability of healthy food in the community (H4). We didn’t test mediation effect of eating behavior because the main effect was not significant.

Confirmation here was marginal; a model predicting BMI from Rurality and healthy food availability found healthy food availability marginally predicted BMI (\(p=0.05\)). A mediation analysis found the proportion of the effect of rurality on BMI mediated by healthy food availability to be just 8.0% (CI=[0.7%, 26%], \(p=0.03\)).

Supplementary predictions

We predicted that perceived stress (measured by the 4-item PSS) and the SRHI would negatively affect eating behavior and health outcomes.

Partially confirming the hypothesis, in a linear model of BMI, healthy (p<0.001) and unhealthy (p<0.01) components of the SRHI strongly affected the health outcome, but PSS did not significantly affect the health outcome.

In a linear model of unhealthy food consumption, contrary to our hypothesis, PSS negatively predicted unhealthy eating (p<0.05) suggesting that respondents reporting higher stress ate more healthily. Consistent with our hypothesis, healthy (p<0.001) and unhealthy (p<0.001) componetns of the SRHI strongly affected the measure of eating behavior.

Overall, the link between SHRI and BMI confirms the role of SHRI in health outcomes. More examination is needed into the role of stress in healthy eating.

We also predicted that IPAQ–a measure of exercise– would be affected by health outcomes, though not eating behavior